In [1]:
import polars as pl
import pandas as pd
import numpy as np
from datetime import datetime
import plotly.express as px
import plotly.graph_objects as go
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.tools.tools import add_constant
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skewnorm, norm
import warnings
warnings.simplefilter(action='ignore')

from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATSx, NHITS
#from neuralforecast.losses import MAE
from neuralprophet import NeuralProphet
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import precision_score, recall_score, roc_auc_score, roc_curve, r2_score, f1_score, mean_squared_error, mean_absolute_error, mean_absolute_percentage_error, mean_squared_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler

from tqdm import tqdm

Inflation(GDP Deflator): Inflation is defined as a prolonged increase in the general price level of goods and services that erodes money's purchasing power. This might be caused by excessive aggregate demand, increased production costs (cost-push inflation), or central bank policy. Inflation can erode purchasing power, cause economic instability, and amplify income inequality. Central banks combat inflation by raising interest rates, selling government bonds, and increasing reserve requirements. Understanding inflation is critical for making informed decisions about saving and investing.

Unemployment rate: The unemployment rate is defined as the percentage of the labor force that is actively looking for work but is unable to find it, representing the state of the economy and job availability.The unemployment rate has a substantial impact on financial markets such as stocks, gold, and Bitcoin.High unemployment can lead to lower stock prices as consumer spending and corporate profits decline. Gold often shines in such times due to its safe-haven appeal, while Bitcoin can rise as an alternative investment amidst turmoil. Low unemployment, on the other hand, could prop up stock prices thanks to increased consumer spending and stronger corporate profits. Gold might lose its luster as economic strength reduces the need for safe havens, while Bitcoin's fate could be mixed based on investor risk appetite and its adoption potential.

GDP growth rate: The gross domestic product (GDP) growth rate, which measures the percentage change in the total market value of goods and services produced within a country over a specific period, serves as a crucial indicator of economic health and activity. Its influence on the financial market is significant. Increased economic activity leads to better business earnings, which drives up stock prices and boosts investor confidence. High-growth periods can raise risk appetite, thereby undermining gold's appeal as a haven asset. The link between GDP growth and cryptocurrency prices is complicated and changing.

GDP: Gross domestic product (GDP) represents the total monetary value of all final goods and services produced within a country during a specific period. This critical economic indicator influences a wide range of financial markets, including stocks, gold, and even Bitcoin. Economic growth fuels stock prices as corporate profits rise, while stagnant economies can lead to downturns. Gold is a safe investment during times of uncertainty, but its demand may wane during strong economic periods. Bitcoin's relationship with GDP is complex and requires further investigation. Understanding these dynamics empowers informed decision-making in the financial market.

Interest rate: Interest rates, or the cost of borrowing money, are a significant influence in the financial landscape, having enormous effects across a wide range of asset classes. Interest rates influence investment decisions, economic activity, and, ultimately, the functioning of financial markets by changing the cost of lending. Lower interest rates encourage borrowing and investment, potentially leading to higher business valuations and higher stock prices. Higher rates, on the other hand, discourage borrowing and investment, putting downward pressure on stock prices and necessitating a reconsideration of business values. Gold is sometimes used as an edge against inflation, with higher interest rates potentially benefiting from inflationary periods. Higher interest rates, on the other hand, promote investment in other asset types, thereby undermining gold's appeal.

CPI: The Consumer Price Index (CPI) tracks price fluctuations in a basket of products and services commonly purchased by urban consumers. This critical indicator serves as an inflation measure, influencing numerous areas of the financial market. Rising CPI means increased inflation, which may reduce company profits and discourage investment, potentially resulting in lower stock prices. Gold has traditionally been used to limit inflation, typically profiting from periods of rising CPI because its price rises in accordance with inflation.

Closing Price: The actual dataset had the opening, closing, low and high prices for each record. However we decided to move forward with the closing price instead. The closing price is the value of the last transacted price before the market officially closes for trading. However for Bitcoin, closing price generally refers to the price at 11:59 PM UTC of any given day.

In [2]:
df = pl.read_csv("SnP500.csv")
In [3]:
df.describe()
Out[3]:
shape: (9, 10)
statisticDateCloseVolumeInflationUnemploymentGDP_Growth_RateGDPInterest_rateCPI
strstrf64f64f64f64f64f64f64f64
"count""7555"7555.07555.07555.07555.07555.07555.07555.07555.0
"null_count""0"0.00.00.00.00.00.00.00.0
"mean"null1616.1116032.6369e92.151765.7525482.4638431.4420e133.3176142.489871
"std"null977.0934221.7951e91.1680991.6278491.8249535.0062e122.1328031.441926
"min""1/10/1993"429.0499881.499e70.6409553.65-2.7678036.8600e12-1.189357-0.355546
"25%"null1029.0300299.81e81.5585314.621.8418751.0300e132.0238851.622223
"50%"null1301.3499762.8136e91.899615.452.706371.4500e132.9605062.33769
"75%"null2050.6298833.8921e92.370346.173.7725651.8200e134.898312.951657
"max""9/9/2022"4796.5600591.1456e107.0052769.635.9454852.5500e137.1481788.0028
In [4]:
df['Date']
Out[4]:
shape: (7_555,)
Date
str
"4/1/1993"
"5/1/1993"
"6/1/1993"
"7/1/1993"
"8/1/1993"
…
"23-12-2022"
"27-12-2022"
"28-12-2022"
"29-12-2022"
"30-12-2022"
In [5]:
# Function to normalize date formats
def normalize_date(date_str):
    for fmt in ("%d-%m-%Y", "%d/%m/%Y"):
        try:
            return datetime.strptime(date_str, fmt).strftime("%d-%m-%Y")
        except ValueError:
            continue
    raise ValueError(f"Date format for {date_str} not recognized")
In [6]:
# Apply the normalization function to the Date column
df1 = df.with_columns(
    pl.col("Date").apply(normalize_date)
)
In [7]:
df1
Out[7]:
shape: (7_555, 9)
DateCloseVolumeInflationUnemploymentGDP_Growth_RateGDPInterest_rateCPI
strf64i64f64f64f64f64f64f64
"04-01-1993"435.3800052012100002.370346.92.7517816.8600e123.5456172.951657
"05-01-1993"434.3399962403500002.370346.92.7517816.8600e123.5456172.951657
"06-01-1993"434.5199892952400002.370346.92.7517816.8600e123.5456172.951657
"07-01-1993"430.7300113048500002.370346.92.7517816.8600e123.5456172.951657
"08-01-1993"429.0499882634700002.370346.92.7517816.8600e123.5456172.951657
………………………
"23-12-2022"3844.82006828192800007.0052763.652.0615932.5500e130.08.0028
"27-12-2022"3829.2530303000007.0052763.652.0615932.5500e130.08.0028
"28-12-2022"3783.21997130835200007.0052763.652.0615932.5500e130.08.0028
"29-12-2022"3849.28002930036800007.0052763.652.0615932.5500e130.08.0028
"30-12-2022"3839.529798700007.0052763.652.0615932.5500e130.08.0028
In [8]:
df1["Date"]
Out[8]:
shape: (7_555,)
Date
str
"04-01-1993"
"05-01-1993"
"06-01-1993"
"07-01-1993"
"08-01-1993"
…
"23-12-2022"
"27-12-2022"
"28-12-2022"
"29-12-2022"
"30-12-2022"
In [9]:
# Convert the 'Date' column to datetime
df1 = df1.with_columns(
    pl.col("Date").str.strptime(pl.Date))
In [10]:
df1["Date"]
Out[10]:
shape: (7_555,)
Date
date
1993-01-04
1993-01-05
1993-01-06
1993-01-07
1993-01-08
…
2022-12-23
2022-12-27
2022-12-28
2022-12-29
2022-12-30
In [11]:
# Extract year and quarter from the date
df1 = df1.with_columns([
    pl.col("Date").dt.year().alias("Year"),
    pl.col("Date").dt.quarter().alias("Quarter")
])
In [12]:
# Group by year and quarter and calculate the mean for each group
quarterly_df = df1.groupby(["Year", "Quarter"]).mean()

# Drop the Date column and rename the columns appropriately
quarterly_df = quarterly_df.drop("Date").rename({"Year": "Year", "Quarter": "Quarter"}).sort(["Year", "Quarter"])

quarterly_df
Out[12]:
shape: (120, 10)
YearQuarterCloseVolumeInflationUnemploymentGDP_Growth_RateGDPInterest_rateCPI
i32i8f64f64f64f64f64f64f64f64
19931442.7503212.6597e82.370346.92.7517816.8600e123.5456172.951657
19932445.5058722.6200e82.370346.92.7517816.8600e123.5456172.951657
19933453.5587482.5574e82.370346.92.7517816.8600e123.5456172.951657
19934464.2718742.75129375e82.370346.92.7517816.8600e123.5456172.951657
19941469.2134923.1279e82.1354246.124.0287937.2900e124.898312.607442
…………………………
202144602.1088944.0824e94.4927925.355.9454852.3300e13-1.1893574.697859
202214463.8554775.0287e97.0052763.652.0615932.5500e130.08.0028
202224105.6671024.9249e97.0052763.652.0615932.5500e130.08.0028
202233980.3511124.1903e97.0052763.652.0615932.5500e130.08.0028
202243851.9735014.3452e97.0052763.652.0615932.5500e130.08.0028
In [13]:
#quarterly_df.write_csv("Quarter.csv")
In [14]:
#quarterly_df = pd.DataFrame(quarterly_df)
#quarterly_df
In [15]:
quarterly_df.columns
Out[15]:
['Year',
 'Quarter',
 'Close',
 'Volume',
 'Inflation',
 'Unemployment',
 'GDP_Growth_Rate',
 'GDP',
 'Interest_rate',
 'CPI']
In [16]:
# Convert to Pandas DataFrame
quarterly_df_pandas = quarterly_df.to_pandas()
quarterly_df_pandas
Out[16]:
Year Quarter Close Volume Inflation Unemployment GDP_Growth_Rate GDP Interest_rate CPI
0 1993 1 442.750321 2.659718e+08 2.370340 6.90 2.751781 6.860000e+12 3.545617 2.951657
1 1993 2 445.505872 2.620033e+08 2.370340 6.90 2.751781 6.860000e+12 3.545617 2.951657
2 1993 3 453.558748 2.557414e+08 2.370340 6.90 2.751781 6.860000e+12 3.545617 2.951657
3 1993 4 464.271874 2.751294e+08 2.370340 6.90 2.751781 6.860000e+12 3.545617 2.951657
4 1994 1 469.213492 3.127857e+08 2.135424 6.12 4.028793 7.290000e+12 4.898310 2.607442
... ... ... ... ... ... ... ... ... ... ...
115 2021 4 4602.108894 4.082385e+09 4.492792 5.35 5.945485 2.330000e+13 -1.189357 4.697859
116 2022 1 4463.855477 5.028659e+09 7.005276 3.65 2.061593 2.550000e+13 0.000000 8.002800
117 2022 2 4105.667102 4.924918e+09 7.005276 3.65 2.061593 2.550000e+13 0.000000 8.002800
118 2022 3 3980.351112 4.190339e+09 7.005276 3.65 2.061593 2.550000e+13 0.000000 8.002800
119 2022 4 3851.973501 4.345159e+09 7.005276 3.65 2.061593 2.550000e+13 0.000000 8.002800

120 rows × 10 columns

In [17]:
# Plot 1: Line plot of Close prices over time
fig1 = px.line(quarterly_df_pandas, x='Year', y='Close', color='Quarter', title='Close Prices Over Time')
fig1.show()
In [18]:
# Plot 2: Line plot of GDP over time
fig2 = px.line(quarterly_df_pandas, x='Year', y='GDP', color='Quarter', title='GDP Over Time')
fig2.show()
In [19]:
# Plot 3: Line plot of Inflation over time
fig3 = px.line(quarterly_df_pandas, x='Year', y='Inflation', color='Quarter', title='Inflation Over Time')
fig3.show()
In [20]:
# Plot 4: Line plot of Unemployment over time
fig4 = px.line(quarterly_df_pandas, x='Year', y='Unemployment', color='Quarter', title='Unemployment Over Time')
fig4.show()
In [21]:
# Plot 5: Line plot of Interest Rate over time
fig5 = px.line(quarterly_df_pandas, x='Year', y='Interest_rate', color='Quarter', title='Interest Rate Over Time')
fig5.show()
In [22]:
# Plot 6: Line plot of CPI over time
fig6 = px.line(quarterly_df_pandas, x='Year', y='CPI', color='Quarter', title='CPI Over Time')
fig6.show()
In [23]:
# Plot 7: Line plot of GDP Growth Rate over time
fig7 = px.line(quarterly_df_pandas, x='Year', y='GDP_Growth_Rate', color='Quarter', title='GDP Growth Rate Over Time')
fig7.show()
In [24]:
# Plot 8: Line plot of Volume over time
fig8 = px.line(quarterly_df_pandas, x='Year', y='Volume', color='Quarter', title='Volume Over Time')
fig8.show()
In [25]:
#Dropping unnecessary features for further visualisation
quarterly_df_pd = quarterly_df_pandas.drop(columns=["Year","Quarter"])
In [26]:
#Distplot and Boxplot for each feature - Data distribution and Outlier Detection
plt.figure(figsize=[20,60])
columns = quarterly_df_pd.columns
cnt = 1
for col in columns:
    plt.subplot(14, 2, cnt)
    sns.distplot(quarterly_df_pd[col], fit=norm)
    cnt += 1
    plt.subplot(14, 2, cnt)
    sns.boxplot(quarterly_df_pd[col])
    cnt += 1
plt.tight_layout()
plt.show()

1) Features doesn't seem to align close with normal distribution except for CPI. 2) Close, Inflation, Unemployment, GDP Growth Rate and CPI are having outliers.

In [27]:
# Outlier Treatment - Values below the lower bound are replaced with the lower bound, 
# and values above the upper bound are replaced with the upper bound.

# Function to detect and treat outliers using IQR method
def treat_outliers(df, column):
    Q1 = df[column].quantile(0.25)  # Q1 is the 25th percentile, and                                  
    Q3 = df[column].quantile(0.75)  # Q3 is the 75th percentile of the data.
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR  
    upper_bound = Q3 + 1.5 * IQR  
    df[column] = np.where(df[column] < lower_bound, lower_bound, df[column])
    df[column] = np.where(df[column] > upper_bound, upper_bound, df[column])
    return df

# List of columns to treat for outliers
columns_to_treat = ['Close', 'Inflation', 'Unemployment', 'GDP_Growth_Rate', 'CPI']

# Apply outlier treatment to each column
for column in columns_to_treat:
    quarterly_df_pd_treated = treat_outliers(quarterly_df_pd, column)
In [28]:
quarterly_df_pd_treated.describe()
Out[28]:
Close Volume Inflation Unemployment GDP_Growth_Rate GDP Interest_rate CPI
count 120.000000 1.200000e+02 120.000000 120.000000 120.000000 1.200000e+02 120.000000 120.000000
mean 1570.962310 2.638766e+09 2.008767 5.673167 2.571960 1.442433e+13 3.316396 2.388689
std 864.223491 1.701057e+09 0.721140 1.471253 1.545983 5.025693e+12 2.140971 1.126728
min 442.750321 2.557414e+08 0.640955 3.650000 -1.054160 6.860000e+12 -1.189357 -0.355546
25% 1048.111877 1.061234e+09 1.558531 4.620000 1.841875 1.030000e+13 2.023885 1.622223
50% 1295.423167 3.122430e+09 1.908772 5.400000 2.695293 1.450000e+13 2.776468 2.390137
75% 2033.477972 3.887069e+09 2.370340 6.170000 3.772565 1.820000e+13 4.898310 2.951657
max 3511.527113 6.531582e+09 3.588052 8.495000 5.945485 2.550000e+13 7.148178 4.945808
In [29]:
#plt.figure(figsize=[60, 20])
#cnt = 1
#out_col = quarterly_df_pd_treated.columns
#for col in out_col:
#    plt.subplot(4, 3, cnt)
#    sns.boxplot(quarterly_df_pd_treated[col])
#    cnt += 1
#plt.tight_layout()
#plt.show()
In [30]:
# Calculate the correlation matrix
correlation_matrix = quarterly_df_pd_treated.corr()

# Create a heatmap
fig_corr = go.Figure(data=go.Heatmap(
                   z=correlation_matrix.values,
                   x=correlation_matrix.columns,
                   y=correlation_matrix.columns,
                   colorscale='Viridis'))

fig_corr.update_layout(title='Correlation Heatmap')
fig_corr.show()
In [31]:
# Drop the target variable
independent_variables = quarterly_df_pd_treated.drop(columns=['Unemployment'])
# Calculate VIF for each independent variable
# A high VIF value (typically greater than 10) indicates that the variance 
# of the coefficient estimate for that variable is inflated due to multicollinearity.
vif = pd.DataFrame()
vif["VIF Factor"] = [variance_inflation_factor(independent_variables.values, i) for i in range(independent_variables.shape[1])]
vif["features"] = independent_variables.columns
vif
Out[31]:
VIF Factor features
0 8.460349 Close
1 8.136021 Volume
2 5.882536 Inflation
3 1.787589 GDP_Growth_Rate
4 158.537938 GDP
5 1.025189 Interest_rate
6 5.205215 CPI
  • GDP has an extremely high VIF value (158.54), indicating very strong multicollinearity with other features. This feature should be removed to reduce multicollinearity.
  • Close and Volume have moderately high VIF values.
  • Inflation and CPI have moderate VIF values.
In [32]:
# Prepare the data for time series forecasting
quarterly_df_pandas['ds'] = pd.to_datetime(quarterly_df_pandas['Year'].astype(str) + 'Q' + quarterly_df_pandas['Quarter'].astype(str))
quarterly_df_pandas = quarterly_df_pandas.sort_values(by='ds')
In [33]:
#quarterly_df_pandas.set_index('ds', inplace=True)
In [34]:
quarterly_df_pandas.columns
Out[34]:
Index(['Year', 'Quarter', 'Close', 'Volume', 'Inflation', 'Unemployment',
       'GDP_Growth_Rate', 'GDP', 'Interest_rate', 'CPI', 'ds'],
      dtype='object')

Model implementation without GDP¶

In [36]:
# Select the relevant columns
ts_data = quarterly_df_pandas[['ds', 'Unemployment', 'Close', 'Volume', 'Inflation', 'GDP_Growth_Rate', 'Interest_rate', 'CPI']]
ts_data.columns = ['ds', 'y', 'Close', 'Volume', 'Inflation', 'GDP_Growth_Rate', 'Interest_rate', 'CPI']
ts_data['unique_id'] = 1  # Add a unique_id column for the NeuralForecast class

#Feature Scaling
scaler = MinMaxScaler()
columns_to_normalize = ['y', 'Close', 'Volume', 'Inflation', 'GDP_Growth_Rate', 'Interest_rate', 'CPI']
ts_data[columns_to_normalize] = scaler.fit_transform(ts_data[columns_to_normalize])

# Split the data into training and test sets
train_size = int(len(ts_data) * 0.8)
train_data = ts_data[:train_size]
test_data = ts_data[train_size:]

# Initialize the models
nbeatsx_model = NBEATSx(h=len(test_data), input_size= 2*len(test_data), max_steps=50)
nhits_model = NHITS(h=len(test_data), input_size= 2*len(test_data), max_steps=50)

# Initialize the NeuralForecast class
nf = NeuralForecast(models=[nbeatsx_model, nhits_model], freq='Q')

# Fit the models
nf.fit(train_data)

# Make predictions
predictions = nf.predict(test_data)

# Extract predictions for each model
nbeatsx_forecast = predictions[['ds', 'NBEATSx']]
nhits_forecast = predictions[['ds', 'NHITS']]

# Calculate performance metrics for NBEATSx
mae_nbeatsx = mean_absolute_error(test_data['y'], nbeatsx_forecast['NBEATSx'])
mse_nbeatsx = mean_squared_error(test_data['y'], nbeatsx_forecast['NBEATSx'])
rmse_nbeatsx = np.sqrt(mse_nbeatsx)

# Calculate performance metrics for NHITS
mae_nhits = mean_absolute_error(test_data['y'], nhits_forecast['NHITS'])
mse_nhits = mean_squared_error(test_data['y'], nhits_forecast['NHITS'])
rmse_nhits = np.sqrt(mse_nhits)

# Create a DataFrame to return the results in tabular format
results_df = pd.DataFrame({
    'Models': ['NBEATSx', 'NHITS'],
    'MAE': [mae_nbeatsx, mae_nhits],
    'MSE': [mse_nbeatsx, mse_nhits],
    'RMSE': [rmse_nbeatsx, rmse_nhits]
})

results_df
Seed set to 1
Seed set to 1
Sanity Checking: |                                                                               | 0/? [00:00<…
Training: |                                                                                      | 0/? [00:00<…
Validation: |                                                                                    | 0/? [00:00<…
Sanity Checking: |                                                                               | 0/? [00:00<…
Training: |                                                                                      | 0/? [00:00<…
Validation: |                                                                                    | 0/? [00:00<…
Predicting: |                                                                                    | 0/? [00:00<…
Predicting: |                                                                                    | 0/? [00:00<…
Out[36]:
Models MAE MSE RMSE
0 NBEATSx 0.181878 0.074991 0.273845
1 NHITS 0.210793 0.060834 0.246646

Given that MSE and RMSE are often considered more important metrics because they penalize larger errors more heavily, NHITS can be considered the better model overall in this case.

In [46]:
train_data.set_index('ds', inplace=True)
test_data.set_index('ds', inplace=True)
predictions.set_index('ds', inplace=True)
# Plot the results2
plt.figure(figsize=(10, 6))
plt.plot(train_data['y'], label='Training Data')
plt.plot(test_data['y'], label='Test Data')
plt.plot(predictions['NHITS'], label='Predictions', color='red')
plt.legend()
plt.title('Quarterly Unemployment Forecast')
plt.xlabel('Date')
plt.ylabel('Unemployment')
plt.show()

predictions
Out[46]:
NBEATSx NHITS
ds
2022-12-31 0.028557 0.020406
2023-03-31 0.007411 -0.003748
2023-06-30 0.014606 0.026440
2023-09-30 -0.024062 0.040145
2023-12-31 0.029705 0.055193
2024-03-31 -0.037909 0.073615
2024-06-30 -0.075761 0.138846
2024-09-30 -0.040313 0.174221
2024-12-31 -0.016863 0.179870
2025-03-31 -0.012247 0.191009
2025-06-30 -0.040432 0.217390
2025-09-30 0.037842 0.264717
2025-12-31 -0.009255 0.320639
2026-03-31 0.143441 0.371401
2026-06-30 0.189561 0.419133
2026-09-30 0.181166 0.441882
2026-12-31 0.193167 0.430465
2027-03-31 0.228466 0.414037
2027-06-30 0.285549 0.420930
2027-09-30 0.208643 0.443728
2027-12-31 0.173339 0.454692
2028-03-31 0.260231 0.401967
2028-06-30 0.252845 0.365177
2028-09-30 0.162625 0.369130
In [47]:
# insample_prediction
Y_hat_insample = nf.predict_insample(step_size=len(test_data))
plt.figure(figsize=(10, 5))
plt.plot(Y_hat_insample['ds'], Y_hat_insample['y'], label='True')
plt.plot(Y_hat_insample['ds'], Y_hat_insample['NHITS'], label='Forecast')
plt.axvline(Y_hat_insample['ds'].iloc[-12], color='black', linestyle='--', label='Train-Test Split')
plt.xlabel('Timestamp [t]')
plt.ylabel('')
plt.grid()
plt.legend()
Predicting: |                                                                                    | 0/? [00:00<…
Predicting: |                                                                                    | 0/? [00:00<…
Out[47]:
<matplotlib.legend.Legend at 0x224d46fdfd0>

Model implementation with GDP(avoiding vif results)¶

In [42]:
# Select the relevant columns
ts_data1 = quarterly_df_pandas[['ds', 'Unemployment', 'Close', 'Volume', 'Inflation', 'GDP_Growth_Rate', 'GDP', 'Interest_rate', 'CPI']]
ts_data1.columns = ['ds', 'y', 'Close', 'Volume', 'Inflation', 'GDP_Growth_Rate', 'GDP', 'Interest_rate', 'CPI']
ts_data1['unique_id'] = 2  # Add a unique_id column for the NeuralForecast class


#Feature Scaling
scaler = MinMaxScaler()
columns_to_normalize1 = ['y', 'Close', 'Volume', 'Inflation', 'GDP_Growth_Rate', 'GDP', 'Interest_rate', 'CPI']
ts_data1[columns_to_normalize1] = scaler.fit_transform(ts_data1[columns_to_normalize1])

# Split the data into training and test sets
train_size1 = int(len(ts_data1) * 0.8)
train_data1 = ts_data1[:train_size]
test_data1 = ts_data1[train_size:]

# Initialize the models
nbeatsx_model1 = NBEATSx(h=len(test_data1), input_size= 2*len(test_data1), max_steps=50)
nhits_model1 = NHITS(h=len(test_data1), input_size= 2*len(test_data1), max_steps=50)

# Initialize the NeuralForecast class
nf1 = NeuralForecast(models=[nbeatsx_model1, nhits_model1], freq='Q')

# Fit the models
nf1.fit(train_data1)

# Make predictions
predictions1 = nf1.predict(test_data1)

# Extract predictions for each model
nbeatsx_forecast1 = predictions1[['ds', 'NBEATSx']]
nhits_forecast1 = predictions1[['ds', 'NHITS']]

# Calculate performance metrics for NBEATSx
mae_nbeatsx1 = mean_absolute_error(test_data1['y'], nbeatsx_forecast1['NBEATSx'])
mse_nbeatsx1 = mean_squared_error(test_data1['y'], nbeatsx_forecast1['NBEATSx'])
rmse_nbeatsx1 = np.sqrt(mse_nbeatsx1)

# Calculate performance metrics for NHITS
mae_nhits1 = mean_absolute_error(test_data1['y'], nhits_forecast1['NHITS'])
mse_nhits1 = mean_squared_error(test_data1['y'], nhits_forecast1['NHITS'])
rmse_nhits1 = np.sqrt(mse_nhits1)

# Create a DataFrame to return the results in tabular format
results_df1 = pd.DataFrame({
    'Models': ['NBEATSx', 'NHITS'],
    'MAE': [mae_nbeatsx1, mae_nhits1],
    'MSE': [mse_nbeatsx1, mse_nhits1],
    'RMSE': [rmse_nbeatsx1, rmse_nhits1]
})

results_df1
Seed set to 1
Seed set to 1
Sanity Checking: |                                                                               | 0/? [00:00<…
Training: |                                                                                      | 0/? [00:00<…
Validation: |                                                                                    | 0/? [00:00<…
Sanity Checking: |                                                                               | 0/? [00:00<…
Training: |                                                                                      | 0/? [00:00<…
Validation: |                                                                                    | 0/? [00:00<…
Predicting: |                                                                                    | 0/? [00:00<…
Predicting: |                                                                                    | 0/? [00:00<…
Out[42]:
Models MAE MSE RMSE
0 NBEATSx 0.181878 0.074991 0.273845
1 NHITS 0.210793 0.060834 0.246646
In [48]:
train_data1.set_index('ds', inplace=True)
test_data1.set_index('ds', inplace=True)
predictions1.set_index('ds', inplace=True)
# Plot the results2
plt.figure(figsize=(10, 6))
plt.plot(train_data1['y'], label='Training Data')
plt.plot(test_data1['y'], label='Test Data')
plt.plot(predictions1['NHITS'], label='Predictions', color='red')
plt.legend()
plt.title('Quarterly Unemployment Forecast')
plt.xlabel('Date')
plt.ylabel('Unemployment')
plt.show()

predictions1 
Out[48]:
NBEATSx NHITS
ds
2022-12-31 0.028557 0.020406
2023-03-31 0.007411 -0.003748
2023-06-30 0.014606 0.026440
2023-09-30 -0.024062 0.040145
2023-12-31 0.029705 0.055193
2024-03-31 -0.037909 0.073615
2024-06-30 -0.075761 0.138846
2024-09-30 -0.040313 0.174221
2024-12-31 -0.016863 0.179870
2025-03-31 -0.012247 0.191009
2025-06-30 -0.040432 0.217390
2025-09-30 0.037842 0.264717
2025-12-31 -0.009255 0.320639
2026-03-31 0.143441 0.371401
2026-06-30 0.189561 0.419133
2026-09-30 0.181166 0.441882
2026-12-31 0.193167 0.430465
2027-03-31 0.228466 0.414037
2027-06-30 0.285549 0.420930
2027-09-30 0.208643 0.443728
2027-12-31 0.173339 0.454692
2028-03-31 0.260231 0.401967
2028-06-30 0.252845 0.365177
2028-09-30 0.162625 0.369130
In [49]:
# insample_prediction
Y_hat_insample1 = nf1.predict_insample(step_size=len(test_data1))
plt.figure(figsize=(10, 5))
plt.plot(Y_hat_insample1['ds'], Y_hat_insample1['y'], label='True')
plt.plot(Y_hat_insample1['ds'], Y_hat_insample1['NHITS'], label='Forecast')
plt.axvline(Y_hat_insample1['ds'].iloc[-12], color='black', linestyle='--', label='Train-Test Split')
plt.xlabel('Timestamp [t]')
plt.ylabel('')
plt.grid()
plt.legend()
Predicting: |                                                                                    | 0/? [00:00<…
Predicting: |                                                                                    | 0/? [00:00<…
Out[49]:
<matplotlib.legend.Legend at 0x224d48c6ac0>
In [ ]: